# read data
student <- read.csv('/Users/feiyasuo/Documents/GitHub/consulting-project-pandemic-survey/student.csv')
faculty <- read.csv('/Users/feiyasuo/Documents/GitHub/consulting-project-pandemic-survey/faculty.csv')
In this section, I analyze multiple-choice questions in the survey. The figures show the count for each option with a 95% confidence interval. If the confidence intervals of any two bars do not overlap, it indicates that the numbers of respondents choosing the two options are significantly different.
student_q1 <- data.frame(table(student[student$q1!='',]$q1)[-1])
student_q1$prop <- student_q1$Freq/sum(student_q1$Freq)
student_q1$sd <- sqrt(student_q1$prop*(1-student_q1$prop)*student_q1$Freq)
ggplot(student_q1) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q1: Preferred Mode of Learning") +
xlab("") +
coord_flip()
student_q2 <- data.frame(table(student[student$q2!='',]$q2))
student_q2 <- student_q2[-1,]
student_q2$prop <- student_q2$Freq/sum(student_q2$Freq)
student_q2$sd <- sqrt(student_q2$prop*(1-student_q2$prop)*student_q2$Freq)
ggplot(student_q2) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q2: Preferred Method of Accessing Lecture Material") +
xlab("") +
coord_flip()
student_q3 <- data.frame(table(student[student$q3!='',]$q3))
student_q3 <- student_q3[-1,]
student_q3$prop <- student_q3$Freq/sum(student_q3$Freq)
student_q3$sd <- sqrt(student_q3$prop*(1-student_q3$prop)*student_q3$Freq)
ggplot(student_q3) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q3: How often will you attend in-person lectures") +
xlab("") +
coord_flip()
student_q6 <- data.frame(table(student[student$q6!='',]$q6))
student_q6 <- student_q6[-1,]
student_q6$Var1 <- factor(student_q6$Var1, levels = c("Very easy", "Easy",
"Neither difficult nor easy", "Difficult",
"Very difficult"))
student_q6$prop <- student_q6$Freq/sum(student_q6$Freq)
student_q6$sd <- sqrt(student_q6$prop*(1-student_q6$prop)*student_q6$Freq)
ggplot(student_q6) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q6: connecting with other students and faculty in small groups") +
xlab("") +
coord_flip()
student_q7 <- data.frame(table(student[student$q7!='',]$q7))
student_q7 <- student_q7[-1,]
student_q7$Var1 <- factor(student_q7$Var1, levels = c("Often", "Sometimes", "Rarely", "Never"))
student_q7$prop <- student_q7$Freq/sum(student_q7$Freq)
student_q7$sd <- sqrt(student_q7$prop*(1-student_q7$prop)*student_q7$Freq)
ggplot(student_q7) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q7: How often did you meet with your small group?") +
xlab("") +
coord_flip()
student_q9 <- data.frame(table(student[student$q9!='',]$q9))
student_q9 <- student_q9[-1,]
student_q9$Var1 <- factor(student_q9$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q9$prop <- student_q9$Freq/sum(student_q9$Freq)
student_q9$sd <- sqrt(student_q9$prop*(1-student_q9$prop)*student_q9$Freq)
ggplot(student_q9) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q9: The virtual curriculum affected your ability for self care") +
xlab("") +
coord_flip()
student_q10 <- data.frame(table(student[student$q10!='',]$q10))
student_q10 <- student_q10[-1,]
student_q10$Var1 <- factor(student_q10$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q10$prop <- student_q10$Freq/sum(student_q10$Freq)
student_q10$sd <- sqrt(student_q10$prop*(1-student_q10$prop)*student_q10$Freq)
ggplot(student_q10) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q10: Virtual activities impacted the quality of your clinical skills education") +
xlab("") +
coord_flip()
student_q11 <- data.frame(table(student[student$q11!='',]$q11))
student_q11 <- student_q11[-1,]
student_q11$Var1 <- factor(student_q11$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q11$prop <- student_q11$Freq/sum(student_q11$Freq)
student_q11$sd <- sqrt(student_q11$prop*(1-student_q11$prop)*student_q11$Freq)
ggplot(student_q11) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q11: COVID-19 affected the extent of your involvement in community service") +
xlab("") +
coord_flip()
student_q12 <- data.frame(table(student[student$q12!='',]$q12))
student_q12 <- student_q12[-1,]
student_q12$Var1 <- factor(student_q12$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q12$prop <- student_q12$Freq/sum(student_q12$Freq)
student_q12$sd <- sqrt(student_q12$prop*(1-student_q12$prop)*student_q12$Freq)
ggplot(student_q12) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q12: Your choice of specialty will be affected due to virtual \n versus in-person interactions with instructorsl") +
xlab("") +
coord_flip()
student_q13 <- data.frame(table(student[student$q13!='',]$q13))
student_q13 <- student_q13[-1,]
student_q13$Var1 <- factor(student_q13$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q13$prop <- student_q13$Freq/sum(student_q13$Freq)
student_q13$sd <- sqrt(student_q13$prop*(1-student_q13$prop)*student_q13$Freq)
ggplot(student_q13) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q13: You were able to connect to students in other class years \n and instructors to identify potential mentors using the Zoom format") +
xlab("") +
coord_flip()
student_q14 <- data.frame(table(student[student$q14!='',]$q14))
student_q14 <- student_q14[-1,]
student_q14$Var1 <- factor(student_q14$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
student_q14$prop <- student_q14$Freq/sum(student_q14$Freq)
student_q14$sd <- sqrt(student_q14$prop*(1-student_q14$prop)*student_q14$Freq)
ggplot(student_q14) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("q14: I feel confident that I belong at UNC School of Medicine") +
xlab("") +
coord_flip()
In this section, I analyze the open-ended questions. For each question, I use a word map to show the most frequent words that the respondents mentioned in the survey. The larger the words are, the more important they are (or more frequently appeared).
# Retrieving the text data
student_q4 <- student$q4
docs <- Corpus(VectorSource(student_q4))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
df <- df[-1,]
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
# Retrieving the text data
student_q5 <- student$q5
docs <- Corpus(VectorSource(student_q5))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
df <- df[c(-1,-3,-4),]
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
# Retrieving the text data
student_q15 <- student$q15
docs <- Corpus(VectorSource(student_q15))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
This table shows the level of dependency between each pair of the multiple-choice questions. The numbers here are p-values. If the value is smaller than 0.05, it means that the we reject the null hypothesis that the two questions are independent. This indicates the two questions are correlated with each other.
student_cat <- apply(student[student$q1!="", c(3:5,8:16)], 2, as.character)
p_value <- matrix(nrow = 12, ncol = 12)
for (i in 1:11) {
for (j in (i+1):12) {
temp <- table(student_cat[,i], student_cat[,j])
p_value[j,i] <- chisq.test(temp, simulate.p.value = TRUE)$p.value
}
}
The PCA analysis can show potential clusters inside the survey results. From PCA scores, we can see that all questions have similar contribution to PCA1, except for question 3. The figure also show that all questions other than question 3 are driving the scores into one direction (to the upper right corner). Thus, there is no obvious clusters from the PCA analysis.
# change categorical variable to numeric
student_num <- student[c(5,8:16)]
levels(student_num$q3) <- c(0,3,4)
levels(student_num$q6) <- c(0,1,2,3,4,5)
levels(student_num$q7) <- c(5,4,3,2,1)
levels(student_num$q8) <- c(0,5,4,3,2,1)
levels(student_num$q9) <- c(0,5,4,3,2,1)
levels(student_num$q10) <- c(0,5,4,3,2,1)
levels(student_num$q11) <- c(0,5,4,3,2,1)
levels(student_num$q12) <- c(0,5,4,3,2,1)
levels(student_num$q13) <- c(0,5,4,3,2,1)
levels(student_num$q14) <- c(0,5,4,3,2,1)
student_num$q3 <- as.numeric(student_num$q3)
student_num$q6 <- as.numeric(student_num$q6)
student_num$q7 <- as.numeric(student_num$q7)
student_num$q8 <- as.numeric(student_num$q8)
student_num$q9 <- as.numeric(student_num$q9)
student_num$q10 <- as.numeric(student_num$q10)
student_num$q11 <- as.numeric(student_num$q11)
student_num$q12 <- as.numeric(student_num$q12)
student_num$q13 <- as.numeric(student_num$q13)
student_num$q14 <- as.numeric(student_num$q14)
# Compute the Principal Components
student_pca <- prcomp(student_num, center = TRUE,scale. = TRUE)
student_pca
## Standard deviations (1, .., p=10):
## [1] 2.3251163 1.0237261 0.8703568 0.7721940 0.7566940 0.6392828 0.6095487
## [8] 0.5509207 0.5484107 0.4846945
##
## Rotation (n x k) = (10 x 10):
## PC1 PC2 PC3 PC4 PC5 PC6
## q3 0.1300805 -0.87875785 0.063488635 -0.3473311 0.05732723 -0.03114513
## q6 0.3439932 -0.14644081 0.233113644 0.4235644 0.02426716 -0.27450239
## q7 0.3003324 0.24650549 0.318057744 -0.2906026 -0.66183689 0.17801382
## q8 0.3253917 -0.08656831 -0.531142482 -0.1831559 -0.18717526 0.45048975
## q9 0.3445623 -0.06287720 -0.098039630 0.3185741 -0.37305102 -0.30812860
## q10 0.3223695 0.21117543 0.005649359 -0.5896929 0.21598407 -0.34827891
## q11 0.3346888 0.26629253 -0.098627167 -0.1504568 0.38808260 -0.25614961
## q12 0.3598480 -0.12205262 0.176654285 0.1480359 -0.02243026 -0.11600739
## q13 0.3271367 0.03642792 -0.507362352 0.2775345 0.17813486 0.17055895
## q14 0.3122460 0.04895455 0.500359733 0.1056121 0.39294097 0.60385955
## PC7 PC8 PC9 PC10
## q3 -0.008785297 0.184738234 -0.07415377 -0.20550766
## q6 -0.415483246 0.257530285 -0.12704888 0.54354847
## q7 -0.188994125 0.183294087 -0.28227877 -0.21144999
## q8 0.184126459 -0.044778469 0.02337552 0.54455309
## q9 0.391933374 0.149179242 0.55415109 -0.22172530
## q10 -0.317206472 -0.210485999 0.42908926 0.08199983
## q11 0.461215962 0.389990957 -0.44676003 -0.06386516
## q12 0.208188483 -0.799764452 -0.32157326 -0.04642661
## q13 -0.483133148 -0.008403488 -0.09171193 -0.50447091
## q14 0.118165967 0.074850235 0.30905677 -0.06679319
# plot pca
library(devtools)
## Loading required package: usethis
#install_github("vqv/ggbiplot")
library(ggbiplot)
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Loading required package: grid
ggbiplot(student_pca) + coord_equal(ratio = 0.4)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
I did the same analysis for the faculty survey as what I did for the student survey. Results can bbe found in the following section.
faculty_q1 <- data.frame(table(faculty[faculty$q1!='',]$q1))
faculty_q1$prop <- faculty_q1$Freq/sum(faculty_q1$Freq)
faculty_q1$sd <- sqrt(faculty_q1$prop*(1-faculty_q1$prop)*faculty_q1$Freq)
ggplot(faculty_q1) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q1: What is your preferred mode of teaching?") +
xlab("") +
coord_flip()
faculty_q6 <- data.frame(table(faculty[faculty$q6!='',]$q6))
faculty_q6$Var1 <- factor(faculty_q6$Var1, levels = c("Very easy", "Easy",
"Neither difficult nor easy", "Difficult",
"Very difficult"))
faculty_q6$prop <- faculty_q6$Freq/sum(faculty_q6$Freq)
faculty_q6$sd <- sqrt(faculty_q6$prop*(1-faculty_q6$prop)*faculty_q6$Freq)
ggplot(faculty_q6) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q6: How easy was it connecting with students in small groups \n over Zoom?") +
xlab("") +
coord_flip()
faculty_q9 <- data.frame(table(faculty[faculty$q9!='',]$q9))
faculty_q9 <- faculty_q9[-1,]
faculty_q9$Var1 <- factor(faculty_q9$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
faculty_q9$prop <- faculty_q9$Freq/sum(faculty_q9$Freq)
faculty_q9$sd <- sqrt(faculty_q9$prop*(1-faculty_q9$prop)*faculty_q9$Freq)
ggplot(faculty_q9) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q9: The virtual curriculum affected your ability for self care") +
xlab("") +
coord_flip()
faculty_q12 <- data.frame(table(faculty[faculty$q12!='',]$q12))
faculty_q12 <- faculty_q12[-1,]
faculty_q12$Var1 <- factor(faculty_q12$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
faculty_q12$prop <- faculty_q12$Freq/sum(faculty_q12$Freq)
faculty_q12$sd <- sqrt(faculty_q12$prop*(1-faculty_q12$prop)*faculty_q12$Freq)
ggplot(faculty_q12) +
geom_bar(aes(x=Var1, y=Freq), stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar(aes(x=Var1, ymin=Freq-sd*1.96, ymax=Freq+sd*1.96), width=0.4, colour="orange") +
ggtitle("Q12: The virtual curriculum affected your ability for self care") +
xlab("") +
coord_flip()
# Retrieving the text data
faculty_q1_other <- faculty$q1_other
docs <- Corpus(VectorSource(faculty_q1_other))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
# Retrieving the text data
faculty_q4 <- faculty$q4
docs <- Corpus(VectorSource(faculty_q4))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
df <- df[-1,]
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
# Retrieving the text data
faculty_q12_explain <- faculty$q12_explain
docs <- Corpus(VectorSource(faculty_q12_explain))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
# Retrieving the text data
faculty_q16 <- faculty$q16
docs <- Corpus(VectorSource(faculty_q16))
# Clean the text data
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
# Create a document-term-matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE,
rot.per=0.35, colors=brewer.pal(8, "Dark2"))
Similar to the student survey, the PCA analysis does not find any obvious clusters from the survey results. The sample size and the number of questions inside the faculty survey are both small, so it is hard to detect.
# change categorical variable to numeric
faculty_num <- faculty[c(5:7)]
levels(faculty_num$q6) <- c(0,1,2,3,4,5)
levels(faculty_num$q9) <- c(0,5,4,3,2,1)
levels(faculty_num$q12) <- c(0,5,4,3,2,1)
faculty_num$q6 <- as.numeric(faculty_num$q6)
faculty_num$q9 <- as.numeric(faculty_num$q9)
faculty_num$q12 <- as.numeric(faculty_num$q12)
# Compute the Principal Components
faculty_pca <- prcomp(faculty_num, center = TRUE,scale. = TRUE)
faculty_pca
## Standard deviations (1, .., p=3):
## [1] 1.1746540 0.9904341 0.7995176
##
## Rotation (n x k) = (3 x 3):
## PC1 PC2 PC3
## q6 -0.6971962 0.04798775 0.7152725
## q9 -0.3158145 -0.91627945 -0.2463599
## q12 -0.6435672 0.39765456 -0.6539817
# plot pca
library(ggbiplot)
ggbiplot(faculty_pca) + coord_equal(ratio = 0.4)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
In this section, I use different methods to analyze the relationship between the student and faculty surveys. We can know whether students and faculty have similar opinions on how pandemic influenced their academic activities and level of wellness.
I first used a grouped bar plot with the 95% confidence interval to check if students and faculty members chose the same option for a specific question. For each option, I use bars with different colors to show the ratio of students or faculty members who chose it. If the confidence intervals do not overlap, it means the ratio of students who chose this option is significantly different from the ratio of faculty members who chose it.
merge_q1 <- rbind(student_q1, faculty_q1[-4,])
merge_q1$category <- c(rep("student",4), rep("faculty",4))
merge_q1$sd <- sqrt(merge_q1$prop*(1-merge_q1$prop)/merge_q1$Freq)
ggplot(merge_q1, aes(fill=category, y=prop, x=Var1)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(x=Var1, ymin=prop-sd*1.96, ymax=prop+sd*1.96), position = position_dodge(0.95), width=0.4, colour="orange") +
ggtitle("Q1: What is your preferred mode of teaching/leanring") +
xlab("") + ylab("Proportion") +
coord_flip()
merge_q6 <- rbind(student_q6, faculty_q6)
de<-data.frame("Very difficult",0,0,0)
names(de)<-c("Var1","Freq", "prop", "sd")
merge_q6 <- rbind(merge_q6, de)
merge_q6$category <- c(rep("student",5), rep("faculty",5))
merge_q6$Var1 <- factor(merge_q6$Var1, levels = c("Very easy", "Easy",
"Neither difficult nor easy", "Difficult",
"Very difficult"))
merge_q6$sd <- sqrt(merge_q6$prop*(1-merge_q6$prop)/merge_q6$Freq)
ggplot(merge_q6, aes(fill=category, y=prop, x=Var1)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(x=Var1, ymin=prop-sd*1.96, ymax=prop+sd*1.96), position = position_dodge(0.95), width=0.4, colour="orange") +
ggtitle("Q6: How easy was it connecting with faculty/students in small groups over Zoom?") +
xlab("") + ylab("Proportion") +
coord_flip()
merge_q9 <- rbind(student_q9, faculty_q9)
de<-data.frame("Agree",0,0,0)
names(de)<-c("Var1","Freq", "prop", "sd")
merge_q9 <- rbind(merge_q9, de)
merge_q9$Var1 <- factor(merge_q9$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
merge_q9$category <- c(rep("student",5), rep("faculty",5))
merge_q9$sd <- sqrt(merge_q9$prop*(1-merge_q9$prop)/merge_q9$Freq)
ggplot(merge_q9, aes(fill=category, y=prop, x=Var1)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(x=Var1, ymin=prop-sd*1.96, ymax=prop+sd*1.96), position = position_dodge(0.95), width=0.4, colour="orange") +
ggtitle("Q9: The virtual curriculum affected your ability for self care") +
xlab("") + ylab("Proportion") +
coord_flip()
merge_q12 <- rbind(student_q12, faculty_q12)
de<-data.frame("Agree",0,0,0)
names(de)<-c("Var1","Freq", "prop", "sd")
merge_q12 <- rbind(merge_q12, de)
merge_q12$Var1 <- factor(merge_q12$Var1, levels = c("Strongly agree", "Agree", "Neutral",
"Disagree", "Strongly disagree"))
merge_q12$category <- c(rep("student",5), rep("faculty",5))
merge_q12$sd <- sqrt(merge_q12$prop*(1-merge_q12$prop)/merge_q12$Freq)
ggplot(merge_q12, aes(fill=category, y=prop, x=Var1)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(x=Var1, ymin=prop-sd*1.96, ymax=prop+sd*1.96), position = position_dodge(0.95), width=0.4, colour="orange") +
ggtitle("Q12: Speciality was affected by virtual versus inperson interactions") +
xlab("") + ylab("Proportion") +
coord_flip()
Then I use hypothesis testing to check the mean difference of each questions between students and faculty members. I first transfer the answers of each multiple-choice question into numbers, and then calculate the average value of each question. Finally, I can use hypothesis testing to detect any the differences between student and faculty surveys. If the p-value in a test is smaller than 0.05, it shows that the mean value of a question in student survey is significantly different from the faculty survey.
# get common questions in student survey that are categorical & change it to numeric
student_common <- student[,c(3,8,11,14)]
levels(student_common$q1) <- c(0,1,2,3,4)
levels(student_common$q6) <- c(0,1,2,3,4,5)
levels(student_common$q9) <- c(0,5,4,3,2,1)
levels(student_common$q12) <- c(0,5,4,3,2,1)
student_common$q1 <- as.numeric(student_common$q1)
student_common$q6 <- as.numeric(student_common$q6)
student_common$q9 <- as.numeric(student_common$q9)
student_common$q12 <- as.numeric(student_common$q12)
# get common questions in faculty survey that are categorical & change it to numeric
faculty_common <- faculty[,c(2,5:7)]
levels(faculty_common$q1) <- c(1,2,3,0,4)
levels(faculty_common$q6) <- c(0,1,2,3,4,5)
levels(faculty_common$q9) <- c(0,5,4,3,2,1)
levels(faculty_common$q12) <- c(0,5,4,3,2,1)
faculty_common$q1 <- as.numeric(faculty_common$q1)
faculty_common$q6 <- as.numeric(faculty_common$q6)
faculty_common$q9 <- as.numeric(faculty_common$q9)
faculty_common$q12 <- as.numeric(faculty_common$q12)
# test for question 1
wilcox.test(student_common$q1, faculty_common$q1, alternative = "two.sided")
##
## Wilcoxon rank sum test with continuity correction
##
## data: student_common$q1 and faculty_common$q1
## W = 2134, p-value = 1.116e-07
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(student_common$q6, faculty_common$q6, alternative = "two.sided")
##
## Wilcoxon rank sum test with continuity correction
##
## data: student_common$q6 and faculty_common$q6
## W = 1542, p-value = 0.1259
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(student_common$q9, faculty_common$q9, alternative = "two.sided")
##
## Wilcoxon rank sum test with continuity correction
##
## data: student_common$q9 and faculty_common$q9
## W = 1405, p-value = 0.5257
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(student_common$q12, faculty_common$q12, alternative = "two.sided")
##
## Wilcoxon rank sum test with continuity correction
##
## data: student_common$q12 and faculty_common$q12
## W = 1268.5, p-value = 0.8176
## alternative hypothesis: true location shift is not equal to 0